import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
df = pd.read_csv('customer_churn.csv')
df.head()
| customerID | gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | ... | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7590-VHVEG | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | 5575-GNVDE | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | ... | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
| 2 | 3668-QPYBK | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | ... | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | 7795-CFOCW | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | ... | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | 9237-HQITU | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
5 rows × 21 columns
df.shape
(7043, 21)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7043 entries, 0 to 7042 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 customerID 7043 non-null object 1 gender 7043 non-null object 2 SeniorCitizen 7043 non-null int64 3 Partner 7043 non-null object 4 Dependents 7043 non-null object 5 tenure 7043 non-null int64 6 PhoneService 7043 non-null object 7 MultipleLines 7043 non-null object 8 InternetService 7043 non-null object 9 OnlineSecurity 7043 non-null object 10 OnlineBackup 7043 non-null object 11 DeviceProtection 7043 non-null object 12 TechSupport 7043 non-null object 13 StreamingTV 7043 non-null object 14 StreamingMovies 7043 non-null object 15 Contract 7043 non-null object 16 PaperlessBilling 7043 non-null object 17 PaymentMethod 7043 non-null object 18 MonthlyCharges 7043 non-null float64 19 TotalCharges 7043 non-null object 20 Churn 7043 non-null object dtypes: float64(1), int64(2), object(18) memory usage: 1.1+ MB
df.isnull().sum()
customerID 0 gender 0 SeniorCitizen 0 Partner 0 Dependents 0 tenure 0 PhoneService 0 MultipleLines 0 InternetService 0 OnlineSecurity 0 OnlineBackup 0 DeviceProtection 0 TechSupport 0 StreamingTV 0 StreamingMovies 0 Contract 0 PaperlessBilling 0 PaymentMethod 0 MonthlyCharges 0 TotalCharges 0 Churn 0 dtype: int64
df.dtypes
customerID object gender object SeniorCitizen int64 Partner object Dependents object tenure int64 PhoneService object MultipleLines object InternetService object OnlineSecurity object OnlineBackup object DeviceProtection object TechSupport object StreamingTV object StreamingMovies object Contract object PaperlessBilling object PaymentMethod object MonthlyCharges float64 TotalCharges object Churn object dtype: object
df = df.drop(columns = ['customerID'], axis = 1)
df.head()
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
| 2 | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors = 'coerce')
df.isnull().sum()
gender 0 SeniorCitizen 0 Partner 0 Dependents 0 tenure 0 PhoneService 0 MultipleLines 0 InternetService 0 OnlineSecurity 0 OnlineBackup 0 DeviceProtection 0 TechSupport 0 StreamingTV 0 StreamingMovies 0 Contract 0 PaperlessBilling 0 PaymentMethod 0 MonthlyCharges 0 TotalCharges 11 Churn 0 dtype: int64
df.fillna(df['TotalCharges'].mean())
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.50 | No |
| 2 | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.50 | No |
| 7039 | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.90 | No |
| 7040 | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
| 7041 | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.60 | Yes |
| 7042 | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.50 | No |
7043 rows × 20 columns
df.drop(labels = df[df['tenure'] == 0].index, axis = 0 , inplace = True)
df[df['tenure'] == 0].index
Index([], dtype='int64')
df['SeniorCitizen'] = df['SeniorCitizen'].map({0: "No", 1: "Yes"})
df.head()
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | No | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | Male | No | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.50 | No |
| 2 | Male | No | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | Male | No | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | Female | No | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
df.describe()
| tenure | MonthlyCharges | TotalCharges | |
|---|---|---|---|
| count | 7032.000000 | 7032.000000 | 7032.000000 |
| mean | 32.421786 | 64.798208 | 2283.300441 |
| std | 24.545260 | 30.085974 | 2266.771362 |
| min | 1.000000 | 18.250000 | 18.800000 |
| 25% | 9.000000 | 35.587500 | 401.450000 |
| 50% | 29.000000 | 70.350000 | 1397.475000 |
| 75% | 55.000000 | 89.862500 | 3794.737500 |
| max | 72.000000 | 118.750000 | 8684.800000 |
df.describe(include = 'O')
| gender | SeniorCitizen | Partner | Dependents | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 |
| unique | 2 | 2 | 2 | 2 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 2 | 4 | 2 |
| top | Male | No | No | No | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | No |
| freq | 3549 | 5890 | 3639 | 4933 | 6352 | 3385 | 3096 | 3497 | 3087 | 3094 | 3472 | 2809 | 2781 | 3875 | 4168 | 2365 | 5163 |
df.describe(include = 'O').T
| count | unique | top | freq | |
|---|---|---|---|---|
| gender | 7032 | 2 | Male | 3549 |
| SeniorCitizen | 7032 | 2 | No | 5890 |
| Partner | 7032 | 2 | No | 3639 |
| Dependents | 7032 | 2 | No | 4933 |
| PhoneService | 7032 | 2 | Yes | 6352 |
| MultipleLines | 7032 | 3 | No | 3385 |
| InternetService | 7032 | 3 | Fiber optic | 3096 |
| OnlineSecurity | 7032 | 3 | No | 3497 |
| OnlineBackup | 7032 | 3 | No | 3087 |
| DeviceProtection | 7032 | 3 | No | 3094 |
| TechSupport | 7032 | 3 | No | 3472 |
| StreamingTV | 7032 | 3 | No | 2809 |
| StreamingMovies | 7032 | 3 | No | 2781 |
| Contract | 7032 | 3 | Month-to-month | 3875 |
| PaperlessBilling | 7032 | 2 | Yes | 4168 |
| PaymentMethod | 7032 | 4 | Electronic check | 2365 |
| Churn | 7032 | 2 | No | 5163 |
df.describe(include = 'all')
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 7032 | 7032 | 7032 | 7032 | 7032.000000 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032 | 7032.000000 | 7032.000000 | 7032 |
| unique | 2 | 2 | 2 | 2 | NaN | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 2 | 4 | NaN | NaN | 2 |
| top | Male | No | No | No | NaN | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | NaN | NaN | No |
| freq | 3549 | 5890 | 3639 | 4933 | NaN | 6352 | 3385 | 3096 | 3497 | 3087 | 3094 | 3472 | 2809 | 2781 | 3875 | 4168 | 2365 | NaN | NaN | 5163 |
| mean | NaN | NaN | NaN | NaN | 32.421786 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 64.798208 | 2283.300441 | NaN |
| std | NaN | NaN | NaN | NaN | 24.545260 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 30.085974 | 2266.771362 | NaN |
| min | NaN | NaN | NaN | NaN | 1.000000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 18.250000 | 18.800000 | NaN |
| 25% | NaN | NaN | NaN | NaN | 9.000000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 35.587500 | 401.450000 | NaN |
| 50% | NaN | NaN | NaN | NaN | 29.000000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 70.350000 | 1397.475000 | NaN |
| 75% | NaN | NaN | NaN | NaN | 55.000000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 89.862500 | 3794.737500 | NaN |
| max | NaN | NaN | NaN | NaN | 72.000000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 118.750000 | 8684.800000 | NaN |
df
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | No | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | Male | No | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.50 | No |
| 2 | Male | No | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | Male | No | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | Female | No | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | Male | No | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.50 | No |
| 7039 | Female | No | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.90 | No |
| 7040 | Female | No | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
| 7041 | Male | Yes | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.60 | Yes |
| 7042 | Male | No | No | No | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.50 | No |
7032 rows × 20 columns
df1 = df
df1['Churn'].replace(to_replace = 'Yes', value = 1, inplace = True)
df1['Churn'].replace(to_replace = 'No', value = 0, inplace = True)
df_dummies = pd.get_dummies(df1)
df_dummies.head()
| tenure | MonthlyCharges | TotalCharges | Churn | gender_Female | gender_Male | SeniorCitizen_No | SeniorCitizen_Yes | Partner_No | Partner_Yes | ... | StreamingMovies_Yes | Contract_Month-to-month | Contract_One year | Contract_Two year | PaperlessBilling_No | PaperlessBilling_Yes | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 29.85 | 29.85 | 0 | True | False | True | False | False | True | ... | False | True | False | False | False | True | False | False | True | False |
| 1 | 34 | 56.95 | 1889.50 | 0 | False | True | True | False | True | False | ... | False | False | True | False | True | False | False | False | False | True |
| 2 | 2 | 53.85 | 108.15 | 1 | False | True | True | False | True | False | ... | False | True | False | False | False | True | False | False | False | True |
| 3 | 45 | 42.30 | 1840.75 | 0 | False | True | True | False | True | False | ... | False | False | True | False | True | False | True | False | False | False |
| 4 | 2 | 70.70 | 151.65 | 1 | True | False | True | False | True | False | ... | False | True | False | False | False | True | False | False | True | False |
5 rows × 47 columns
plt.figure(figsize = (15, 8))
sns.set(style = 'white')
df_dummies.corr()['Churn'].sort_values(ascending = False).plot(kind = 'bar')
<Axes: >
!pip install sweetviz
Requirement already satisfied: sweetviz in d:\anoconda\lib\site-packages (2.3.1) Requirement already satisfied: pandas!=1.0.0,!=1.0.1,!=1.0.2,>=0.25.3 in d:\anoconda\lib\site-packages (from sweetviz) (2.0.3) Requirement already satisfied: numpy>=1.16.0 in d:\anoconda\lib\site-packages (from sweetviz) (1.24.3) Requirement already satisfied: matplotlib>=3.1.3 in d:\anoconda\lib\site-packages (from sweetviz) (3.7.2) Requirement already satisfied: tqdm>=4.43.0 in d:\anoconda\lib\site-packages (from sweetviz) (4.65.0) Requirement already satisfied: scipy>=1.3.2 in d:\anoconda\lib\site-packages (from sweetviz) (1.11.1) Requirement already satisfied: jinja2>=2.11.1 in d:\anoconda\lib\site-packages (from sweetviz) (3.1.2) Requirement already satisfied: importlib-resources>=1.2.0 in d:\anoconda\lib\site-packages (from sweetviz) (6.1.1) Requirement already satisfied: MarkupSafe>=2.0 in d:\anoconda\lib\site-packages (from jinja2>=2.11.1->sweetviz) (2.1.1) Requirement already satisfied: contourpy>=1.0.1 in d:\anoconda\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (1.0.5) Requirement already satisfied: cycler>=0.10 in d:\anoconda\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (0.11.0) Requirement already satisfied: fonttools>=4.22.0 in d:\anoconda\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (4.25.0) Requirement already satisfied: kiwisolver>=1.0.1 in d:\anoconda\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (1.4.4) Requirement already satisfied: packaging>=20.0 in d:\anoconda\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (23.1) Requirement already satisfied: pillow>=6.2.0 in d:\anoconda\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (9.4.0) Requirement already satisfied: pyparsing<3.1,>=2.3.1 in d:\anoconda\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (3.0.9) Requirement already satisfied: python-dateutil>=2.7 in d:\anoconda\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (2.8.2) Requirement already satisfied: pytz>=2020.1 in d:\anoconda\lib\site-packages (from pandas!=1.0.0,!=1.0.1,!=1.0.2,>=0.25.3->sweetviz) (2023.3.post1) Requirement already satisfied: tzdata>=2022.1 in d:\anoconda\lib\site-packages (from pandas!=1.0.0,!=1.0.1,!=1.0.2,>=0.25.3->sweetviz) (2023.3) Requirement already satisfied: colorama in d:\anoconda\lib\site-packages (from tqdm>=4.43.0->sweetviz) (0.4.6) Requirement already satisfied: six>=1.5 in d:\anoconda\lib\site-packages (from python-dateutil>=2.7->matplotlib>=3.1.3->sweetviz) (1.16.0)
import sweetviz as sv
my_report = sv.analyze(df)
my_report.show_html('report_html')
| | [ 0%] 00:00 ->…
Report report_html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.
gender_labels = ['Male', 'Female']
churn_labels = ['No', 'Yes']
fig = make_subplots(rows = 1, cols = 2, specs = [[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels = gender_labels, values = df['gender'].value_counts(), name = 'Gender'), 1, 1)
fig.add_trace(go.Pie(labels = churn_labels, values = df['Churn'].value_counts(), name = 'Churn'), 1, 2)
fig.update_traces(hole = .55, hoverinfo = "label+percent+name", textfont_size = 16)
fig.update_layout(title_text = "Gender and Churn Distributions",
annotations = [dict(text = 'Gender', x=0.16, y=0.5, font_size=20, showarrow=False),
dict(text = 'Gender', x=0.83, y=0.5, font_size=20, showarrow=False)])
fig.data[0].marker.colors = ('#7fcdff', '#326ada')
fig.data[1].marker.colors = ('#56c175', '#ff9b35')
fig.show()
#color_discrete_map = {"Month-to-month": "#7fcdff", "One year": "#326ada", "Two year": "#ff9b355"}
fig = px.histogram(df, x = 'Churn', color = 'Contract', barmode = 'group')
fig.update_layout(width = 700, height = 500, bargap = 0.1)
fig.show()
75% of customer who have Month-to-Month Contract have opted to move out as compared to 13% of customers who have signed One Year Contract and 3% of customers who have signed Two Year Contract.
fig = px.histogram(df, x = 'Churn', color = "PaymentMethod", title = "<b>Churn distribution w.r.t Custome Payment Method</b>", text_auto = True)
fig.update_layout(width = 700, height = 500, bargap= 0.1)
fig.data[0].marker.color = ('#7fcdff')
fig.data[1].marker.color = ('#ff9b35')
fig.data[2].marker.color = ('#56c175')
fig.show()
Majority of the customers who moved out were having Electronic Check as Payment Method and others who opted for Credit-Card automatic transfer / Bank Automatic Transfer and Mailed Check as Payment Method were less likely to switch.
df[df['gender'] == "Male"][["InternetService", "Churn"]].value_counts()
InternetService Churn DSL 0 992 Fiber optic 0 910 No 0 717 Fiber optic 1 633 DSL 1 240 No 1 57 Name: count, dtype: int64
df[df['gender'] == "Female"][["InternetService", "Churn"]].value_counts()
InternetService Churn DSL 0 965 Fiber optic 0 889 No 0 690 Fiber optic 1 664 DSL 1 219 No 1 56 Name: count, dtype: int64
fig = go.Figure()
colors = {'Female': 'steelblue', 'Male': 'firebrick'}
fig.add_trace(go.Bar(
x = [['Churn:No', 'Churn:No', 'Churn:Yes', 'Churn:Yes'],
["Female", "Male", "Female", "Male"]],
y = [965, 992, 219, 240],
name = 'DSL'))
fig.add_trace(go.Bar(
x = [['Churn:No', 'Churn:No', 'Churn:Yes', 'Churn:Yes'],
['Female', 'Male', 'Female', 'Male']],
y = [889, 910, 664, 663],
name = 'Fiber optic'))
fig.add_trace(go.Bar(
x = [['Churn:No', 'Churn:No', 'Churn:Yes', 'Churn:Yes'],
['Female', 'Male', 'Female', 'Male']],
y = [690, 717, 56, 57],
name = 'No Internet'))
fig.update_layout(title_text= "<b>Churn Distribution w.r.t Internet Service and Gender</b>")
fig.data[0].marker.color = ('#7fcdff', '#7fcdff', '#7fcdff', '#7fcdff')
fig.data[1].marker.color = ('#326ada', '#326ada', '#326ada', '#326ada')
fig.data[2].marker.color = ('#ff9b35','#ff9b35','#ff9b35','#ff9b35')
fig.show()
Fiber optic service which was choosen by a lot of customers and it's evident that there's high churn rate among these customers. This could expose an issue in the Fiber optic service which dissatisfied most of it's customers, further looking into the issue might find a better and apt solution. Customers who opted for DSL service are larger in number and is found to have less churn rate compared to Fibre optic service
fig = px.histogram(df, x = 'Churn', color = 'Dependents', barmode = 'group', title = '<b>Churn distribution w.r.t. Dependents</b>')
fig.update_layout(width = 700, height = 500, bargap = 0.1)
fig.show()
Customers without dependents are more likely to churn
color_map = {"Yes": "#7fcdff", "No": "#326ada"}
fig = px.histogram(df, x = 'Churn', color = 'Partner', barmode = "group", title = '<b>Churn distribution w.r.t. Partners</b>', color_discrete_map = color_map)
fig.update_layout(width = 700, height = 500, bargap = 0.1)
fig.show()
color_map = {"Yes": '#7fcdff', "No": '#326ada'}
fig = px.histogram(df, x = 'Churn', color = 'SeniorCitizen', title = "<b>Churn distribution w.r.t Senior Citizen</b>", color_discrete_map = color_map)
fig.update_layout(width = 700, height = 500, bargap = 0.1)
fig.show()
On the above visual, a conclusion can be obtained such that customers without dependents and customers who have partners are more likely to churn while senior citizens being the most of churn.
color_map = {"Yes": "#7fcdff", "No": "#326ada", "No internet service": "#ff9b35"}
fig = px.histogram(df, x = "Churn", color = "OnlineSecurity", barmode = "group", title = "<b>Churn w.r.t online security</b>", color_discrete_map = color_map)
fig.update_layout(width = 700, height = 500, bargap = 0.1)
fig.show()
Absence of online security,makes most customers churn.
color_map = {"Yes": "#7fcdff", "No": "#326ada"}
fig = px.histogram(df, x = "Churn", color = "PaperlessBilling", barmode = "group", title = "<b>Churn distribution w.r.t Paperless Billing</b>", color_discrete_map = color_map)
fig.update_layout(width = 700, height = 500, bargap = 0.1)
fig.show()
Paperless Billing seems like one the reasons because of which customers are most likely to churn.
color_map = {"Yes": "#7fcdff", "No": "#326ada", "No internet service": "#ff9b35"}
fig = px.histogram(df, x = 'Churn', color = 'TechSupport', barmode = 'group', title = '<b>Churn distribution w.r.t Techsupport</b>', color_discrete_map = color_map)
fig.update_layout(width = 700, height = 500, bargap = 0.1)
fig.show()
The absence of online security, Paperless Billing system and services with no TechSupport were the similiar trend are of the customers who are most likely churn.
color_map = {"Yes": '#7fcdff', "No": '#326ada'}
fig = px.histogram(df, x = "Churn", color = "PhoneService", title = "<b>Churn distribution w.r.t Phone service</b>", color_discrete_map = color_map)
fig.update_layout(width = 700, height = 500, bargap = 0.1)
fig.show()
Eventhough there's a small fraction of customers but it's better to point out as they are more likely to churn because don't have a phone service.
Conclusions as a Data Analyst : ● 75% of customer who have Month-to-Month Contract have opted to move out as compared to 13% of customers who have signed One Year Contract and 3% of customers who have signed Two Year Contract. ● Majority of the customers who moved out were having Electronic Check as Payment Method and others who opted for Credit-Card automatic transfer / Bank Automatic Transfer and Mailed Check as Payment Method were less likely to switch. ● Fiber optic service which was choosen by a lot of customers and it's evident that there's high churn rate among these customers. This could expose an issue in the Fiber optic service which dissatisfied most of it's customers, further looking into the issue might find a better and apt solution. ● Customers who opted for DSL service are larger in number and is found to have less churn rate compared to Fibre optic service ● Customers without dependents and customers who have partners are more likely to churn while senior citizens being the most of churn. ● The absence of online security, Paperless Billing system and services with no TechSupport were the similiar trend are of the customers who are most likely churn. ● There's a small fraction of customers who are more likely to churn and it's been found that they don't have a phone service. A Data Analyst's work done here. Now,comes Data Scientist who makes a model to predict the churn in the future data.